data <- makeExampleData()
class(data)
## [1] "list"
class(data$view_1)
## [1] "matrix"
data$view_1[1:5, 1:5]
## sample_1 sample_2 sample_3 sample_4 sample_5
## feature1 1.04292020 0.42254613 0.3651112 -0.7398621 -0.85776528
## feature2 -0.48673662 0.08065751 0.4987665 0.5177779 0.85860367
## feature3 0.79444546 -0.15074124 -0.4359495 -1.3791868 -0.77422531
## feature4 -0.04001955 -0.15697565 0.2672643 0.5078698 0.16426913
## feature5 0.35637070 0.17460373 -0.2218602 -0.3179047 -0.03301396
data$view_2[1:5, 1:5]
## sample_1 sample_2 sample_3 sample_4 sample_5
## feature1 -0.30478573 0.18945287 1.2142699 -0.02885239 1.698523220
## feature2 -1.29607501 -0.35929397 0.1772687 0.29080186 -0.008588118
## feature3 1.17441443 0.49384601 0.1681526 1.17310775 -1.123760451
## feature4 1.80477274 -0.06750705 -1.2368071 0.38068882 -1.146340648
## feature5 -0.00791598 0.16114470 -0.3845192 0.70679625 -0.375288005
data$view_3[1:5, 1:5]
## sample_1 sample_2 sample_3 sample_4 sample_5
## feature1 0.2612291 -0.08213791 0.2666322 0.04455182 -0.12219337
## feature2 -0.4166879 0.29339382 0.1228433 -0.51984874 0.43640303
## feature3 -0.6145921 -0.69143575 -0.2048873 -0.32561906 -0.04718821
## feature4 -0.3467249 0.43780264 2.0658769 -0.64374345 1.79271128
## feature5 0.3152318 -0.07858969 0.4348445 -0.54362060 -0.17295689
summary(data$view_1[,1:10])
## sample_1 sample_2 sample_3 sample_4
## Min. :-2.45643 Min. :-0.914858 Min. :-0.97372 Min. :-1.89039
## 1st Qu.:-0.29010 1st Qu.:-0.257194 1st Qu.:-0.20457 1st Qu.:-0.21977
## Median :-0.01496 Median : 0.003731 Median : 0.03626 Median : 0.03563
## Mean :-0.01638 Mean :-0.007379 Mean : 0.05734 Mean : 0.07871
## 3rd Qu.: 0.32315 3rd Qu.: 0.222240 3rd Qu.: 0.28475 3rd Qu.: 0.36983
## Max. : 1.26031 Max. : 0.817327 Max. : 0.76781 Max. : 2.88071
## sample_5 sample_6 sample_7 sample_8
## Min. :-1.86196 Min. :-2.7456 Min. :-0.89870 Min. :-3.59294
## 1st Qu.:-0.37314 1st Qu.:-0.3381 1st Qu.:-0.31557 1st Qu.:-0.49843
## Median : 0.02583 Median : 0.1538 Median : 0.02052 Median :-0.02190
## Mean : 0.07326 Mean : 0.1024 Mean :-0.01743 Mean :-0.07599
## 3rd Qu.: 0.44331 3rd Qu.: 0.5084 3rd Qu.: 0.19943 3rd Qu.: 0.30064
## Max. : 2.83617 Max. : 3.2860 Max. : 1.02104 Max. : 2.64937
## sample_9 sample_10
## Min. :-0.95906 Min. :-4.91373
## 1st Qu.:-0.19187 1st Qu.:-0.50626
## Median :-0.02509 Median : 0.03421
## Mean :-0.01973 Mean :-0.10279
## 3rd Qu.: 0.17396 3rd Qu.: 0.46511
## Max. : 0.89147 Max. : 3.61538
set.seed(1234)
MOFAobject <- createMOFAobject(data)
## Creating MOFA object from list of matrices,
## please make sure that samples are columns and features are rows...
plotDataOverview(MOFAobject)
TrainOptions <- getDefaultTrainOptions()
ModelOptions <- getDefaultModelOptions(MOFAobject)
DataOptions <- getDefaultDataOptions()
TrainOptions$DropFactorThreshold <- 0.01
n_inits <- 3
MOFAlist <- lapply(seq_len(n_inits), function(it) {
TrainOptions$seed <- 2018 + it
MOFAobject <- prepareMOFA(MOFAobject,
DataOptions = DataOptions,
ModelOptions = ModelOptions,
TrainOptions = TrainOptions
)
runMOFA(MOFAobject)
})
## Checking data options...
## Checking training options...
## Checking model options...
## [1] "No output file provided, using a temporary file..."
## Checking data options...
## Checking training options...
## Checking model options...
## [1] "No output file provided, using a temporary file..."
## Checking data options...
## Checking training options...
## Checking model options...
## [1] "No output file provided, using a temporary file..."
compareModels(MOFAlist)
With compareFactors we can get an overview of how robust the factors are between different model instances.
compareFactors(MOFAlist)
For down-stream analyses we recommned to choose the model with the best ELBO value as is done by selectModel.
MOFAobject <- selectModel(MOFAlist, plotit = FALSE)
MOFAobject
## Trained MOFA model with the following characteristics:
## Number of views: 3
## View names: view_1 view_2 view_3
## Number of features per view: 100 100 100
## Number of samples: 50
## Number of factors: 5
On the trained MOFAobject we can now start looking into the inferred factors, its weights etc. Here the data was generated using five factors, whose activity patterns we can recover using plotVarianceExplained.
plotVarianceExplained(MOFAobject)
library(dplyr)
library(tidyr)
prcnt <- 0.1 # cutoff: genes must be mutated in prcnt*100 % of the samples
mut_dt_sum <- as_tibble(mut_dt) %>%
mutate(sum = rowSums(., na.rm = TRUE),
genes = rownames(mut_dt)) %>%
arrange(., desc(sum)) %>%
select(genes, sum) %>%
filter(., sum > 53*prcnt)
mut_dt <- mut_dt[mut_dt_sum$genes,]
mofa_data <- list('CyTOF_exp'=CyTOF_exp,
'CyTOF_prcnt'=CyTOF_prcnt,
'RNA_topgenes'=RNA_topclust_E,
'RNA_xcell'=RNA_xcell,
'mutation'=mut_dt)
set.seed(1234)
MOFAobject <- createMOFAobject(mofa_data)
## Creating MOFA object from list of matrices,
## please make sure that samples are columns and features are rows...
plotDataOverview(MOFAobject)
The most important options the user needs to define are:
scaleViews: logical indicating whether to scale views to have unit variance. As long as the scale of the different data sets is not too high, this is not required. Default is FALSE.
removeIncompleteSamples: logical indicating whether to remove samples that are not profiled in all omics. The model can cope with missing assays, so this option is not required. Default is FALSE.
DataOptions <- getDefaultDataOptions()
DataOptions
## $scaleViews
## [1] FALSE
##
## $removeIncompleteSamples
## [1] FALSE
Next, we define model options. The most important are:
numFactors: number of factors (default is 0.5 times the number of samples). By default, the model will only remove a factor if it explains exactly zero variance in the data. You can increase this threshold on minimum variance explained by setting TrainOptions$dropFactorThreshold to a value higher than zero.
likelihoods: likelihood for each view. Usually we recommend gaussian for continuous data, bernoulli for binary data and poisson for count data. By default, the model tries to guess it from the data.
sparsity: do you want to use sparsity? This makes the interpretation easier so it is recommended (Default is TRUE).
ModelOptions <- getDefaultModelOptions(MOFAobject)
ModelOptions
## $likelihood
## CyTOF_exp CyTOF_prcnt RNA_topgenes RNA_xcell mutation
## "gaussian" "gaussian" "gaussian" "gaussian" "bernoulli"
##
## $numFactors
## [1] 45
##
## $sparsity
## [1] TRUE
Next, we define training options. The most important are:
maxiter: maximum number of iterations. Ideally set it large enough and use the convergence criterion TrainOptions$tolerance.
tolerance: convergence threshold based on change in the evidence lower bound. For an exploratory run you can use a value between 1.0 and 0.1, but for a “final” model we recommend a value of 0.01.
DropFactorThreshold: hyperparameter to automatically learn the number of factors based on a minimum variance explained criteria. Factors explaining less than DropFactorThreshold fraction of variation in all views will be removed. For example, a value of 0.01 means that factors that explain less than 1% of variance in all views will be discarded. By default this it zero, meaning that all factors are kept unless they explain no variance at all.
TrainOptions <- getDefaultTrainOptions()
# Automatically drop factors that explain less than 2% of variance in all omics
TrainOptions$DropFactorThreshold <- 0.02
TrainOptions$seed <- 2017
TrainOptions
## $maxiter
## [1] 5000
##
## $tolerance
## [1] 0.1
##
## $DropFactorThreshold
## [1] 0.02
##
## $verbose
## [1] FALSE
##
## $seed
## [1] 2017
prepareMOFA internally performs a set of sanity checks and fills the DataOptions, TrainOptions and ModelOptions slots of the MOFAobject
MOFAobject <- prepareMOFA(
MOFAobject,
DataOptions = DataOptions,
ModelOptions = ModelOptions,
TrainOptions = TrainOptions
)
## Checking data options...
## Checking training options...
## Checking model options...
MOFAobject <- runMOFA(MOFAobject)
## [1] "No output file provided, using a temporary file..."
Disentangling the heterogeneity: calculation of variance explained by each factor in each view
# Calculate the variance explained (R2) per factor in each view
r2 <- calculateVarianceExplained(MOFAobject)
r2$R2Total
## CyTOF_exp CyTOF_prcnt RNA_topgenes RNA_xcell mutation
## 0.5552340 0.3183194 0.8501937 0.7161544 0.3137230
# Variance explained by each factor in each view
head(r2$R2PerFactor)
## CyTOF_exp CyTOF_prcnt RNA_topgenes RNA_xcell mutation
## LF1 3.472188e-01 0.1421290604 1.369124e-05 1.196426e-05 2.508180e-05
## LF2 2.666047e-05 0.0004605097 7.057021e-06 4.080459e-01 4.078439e-05
## LF3 2.050518e-05 0.0028526423 7.581071e-03 2.046978e-02 3.131551e-01
## LF4 3.276518e-05 0.0024545328 2.188586e-01 1.021090e-01 3.508591e-05
## LF5 5.244941e-05 0.0004696920 3.118061e-01 5.899720e-03 3.953266e-05
## LF6 6.214907e-05 0.0012019308 1.925714e-01 4.288120e-02 3.009597e-05
# Plot it
plotVarianceExplained(MOFAobject)
To get an overview of the weights across all factors in a given view you can use the plotWeightsHeatmap function.
plotWeightsHeatmap(
MOFAobject,
view = "CyTOF_prcnt",
factors = 1:5,
show_colnames = T, main = 'CyTOF_prcnt'
)
plotTopWeights(
MOFAobject,
view="CyTOF_prcnt",
factor=3
)
plotTopWeights(
MOFAobject,
view="CyTOF_prcnt",
factor=5
)
plotTopWeights(
MOFAobject,
view="CyTOF_prcnt",
factor=1
)
plotTopWeights(
MOFAobject,
view="CyTOF_prcnt",
factor=2
)
plotWeightsHeatmap(
MOFAobject,
view = "CyTOF_exp",
factors = 1:5,
show_colnames = FALSE, main = 'CyTOF_exp'
)
plotTopWeights(
MOFAobject,
view="CyTOF_exp",
factor=3
)
plotTopWeights(
MOFAobject,
view="CyTOF_exp",
factor=5
)
plotTopWeights(
MOFAobject,
view="CyTOF_exp",
factor=1
)
plotTopWeights(
MOFAobject,
view="CyTOF_exp",
factor=2
)
plotWeightsHeatmap(
MOFAobject,
view = "RNA_topgenes",
factors = 1:5,
show_colnames = FALSE, main = 'RNA_topgenes'
)
plotTopWeights(
MOFAobject,
view="RNA_topgenes",
factor=1
)
plotWeightsHeatmap(
MOFAobject,
view = "RNA_xcell",
factors = 1:5,
show_colnames = T, main = 'RNA_xcell'
)
plotTopWeights(
MOFAobject,
view="RNA_xcell",
factor=2
)
plotTopWeights(
MOFAobject,
view="RNA_xcell",
factor=1
)
plotTopWeights(
MOFAobject,
view="RNA_xcell",
factor=4
)
plotTopWeights(
MOFAobject,
view="RNA_xcell",
factor=5
)
plotWeightsHeatmap(
MOFAobject,
view = "mutation",
factors = 1:5,
show_colnames = F, main = 'Mutation Data'
)
plotTopWeights(
MOFAobject,
view="mutation",
factor=1
)
# Load reactome annotations
data("reactomeGS") # binary matrix with feature sets in rows and features in columns
# perform enrichment analysis
gsea <- runEnrichmentAnalysis(
MOFAobject,
view = "RNA_topgenes",
feature.sets = reactomeGS,
alpha = 0.01
)
## Doing Feature Set Enrichment Analysis with the following options...
## View: RNA_topgenes
## Factors: LF1 LF2 LF3 LF4 LF5 LF6 LF7 LF8 LF9 LF10 LF11 LF12 LF13
## Number of feature sets: 55
## Local statistic: loading
## Transformation: abs.value
## Global statistic: mean.diff
## Statistical test: parametric
plotEnrichmentBars(gsea, alpha=0.01)
interestingFactors <- 1:2
fseaplots <- lapply(interestingFactors, function(factor) {
plotEnrichment(
MOFAobject,
gsea,
factor = factor,
alpha = 0.01,
max.pathways = 10 # The top number of pathways to display
)
})
## Warning in plotEnrichment(MOFAobject, gsea, factor = factor, alpha = 0.01, : No siginificant pathways at the specified alpha threshold.
##
## For an overview use plotEnrichmentHeatmap() or plotEnrichmentBars().
## Warning in plotEnrichment(MOFAobject, gsea, factor = factor, alpha = 0.01, : No siginificant pathways at the specified alpha threshold.
##
## For an overview use plotEnrichmentHeatmap() or plotEnrichmentBars().
cowplot::plot_grid(fseaplots[[1]], fseaplots[[2]],
ncol = 1, labels = paste("Factor", interestingFactors))
interestingFactors <- 3:4
fseaplots <- lapply(interestingFactors, function(factor) {
plotEnrichment(
MOFAobject,
gsea,
factor = factor,
alpha = 0.01,
max.pathways = 10 # The top number of pathways to display
)
})
## Warning in plotEnrichment(MOFAobject, gsea, factor = factor, alpha = 0.01, : No siginificant pathways at the specified alpha threshold.
##
## For an overview use plotEnrichmentHeatmap() or plotEnrichmentBars().
cowplot::plot_grid(fseaplots[[1]], fseaplots[[2]],
ncol = 1, labels = paste("Factor", interestingFactors))
interestingFactors <- 5:6
fseaplots <- lapply(interestingFactors, function(factor) {
plotEnrichment(
MOFAobject,
gsea,
factor = factor,
alpha = 0.01,
max.pathways = 10 # The top number of pathways to display
)
})
cowplot::plot_grid(fseaplots[[1]], fseaplots[[2]],
ncol = 1, labels = paste("Factor", interestingFactors))
plotFactorScatter(
MOFAobject,
factors = 1:2,
color_by = "ImmuneScore" # color by the IGHV values that are part of the training data
#shape_by = "trisomy12" # shape by the trisomy12 values that are part of the training data
)
plotFactorScatters(
MOFAobject,
factors = c(1:5),
color_by = "ImmuneScore"
)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
plotFactorScatters(
MOFAobject,
factors = c(1:5),
color_by = "Th_cells"
)
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
plotFactorScatters(
MOFAobject,
factors = c(1:5),
color_by = "Tc_cells"
)
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
plotFactorScatters(
MOFAobject,
factors = c(1:5),
color_by = "Epi_174Yb_HLA-DR"
)
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
plotFactorScatters(
MOFAobject,
factors = c(1:5),
color_by = "KRAS"
)
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
plotFactorScatters(
MOFAobject,
factors = c(1:5),
color_by = "EGFR"
)
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).